library(data.table)
library(ggplot2)
library(lfe)
library(readstata13)
library(stringr)
library(scales)
library(Hmisc)
library(stargazer)


hmda.in <- fread('../data/loans07to17conventional.csv') # Hmda data---conventional loans from 2007-2017 data
bd <- fread('../data/bankdata_conventional.csv')        # Bank capitalization data
with.avery <- merge(hmda.in,cr[,c('rssdid','year','cr','total_assets','cr_gap'),with=F],by.x=c('rssdid','year'),by.y=c('rssdid','year'),all.x=T)

with.avery[,conf.pct := loan_amount / conforming_limit]
with.avery[,gse := as.integer(purchaser_type %in% 1:4)]
with.avery[,commercial := as.integer(purchaser_type %in% 6)]
with.avery[,other.purchaser := as.integer(purchaser_type %in% c(5,7,9))]
with.avery[,applicant_income := as.numeric(income)]
with.avery[,as.cutoff := ave(total_assets,cut2(total_assets,g=2))]
with.avery[,cr.cutoff := quantile(cr,probs = .75,na.rm=T),by=c('year','as.cutoff')]
with.avery[,high.cr   := as.integer(cr > cr.cutoff)]
with.avery[,type := 'S']
with.avery[bank==1, type := 'B']

##### Tables 1A ####
summary0 <- with.avery[year %in% 2007:2017,j=list(type = 'all',.N,bal = mean(held),bank = mean(commercial),gse = mean(gse),other = mean(other.purchaser))]
summary1 <- with.avery[year %in% 2007:2017,j=list(.N,bal = mean(held),bank = mean(commercial),gse = mean(gse),other = mean(other.purchaser)),by=c('type')]
s <- rbind(summary0,summary1)

toReg <- with.avery


## Figure 1 ##


# Panel A: Jumbo market shares
byYear <- with.avery[loan_type == 1,j = list(pct.jumbo = weighted.mean(jumbo,w=loan_amount,na.rm=T)),by=year]
ggplot(byYear) + geom_line(aes(x=year,y=100*pct.jumbo)) + theme_bw() + xlab(NULL) + ylab(NULL) + scale_y_continuous(labels = function(x) {paste0(x,'%')}) + scale_x_continuous(breaks = seq(2007,2017,by=2))
ggsave('figure_1_panel_a.png',height=3,width=5,units = 'in')

# Panel B: Volumes
byYear<- with.avery[loan_type == 1,j = list(volume = sum(loan_amount,na.rm=T)/1e6),by=c('year','jumbo')]
byYear[,type := 'Conforming']
byYear[jumbo == 1,type := 'Jumbo']
ggplot(byYear) + geom_bar(aes(x=year,y=volume,group=type,fill = type),stat = 'identity',position = 'dodge')  + theme_bw() + scale_fill_brewer(palette = 'Set1') + theme(legend.position = 'none') + scale_y_continuous(label = comma) + xlab(NULL) + ylab(NULL) + scale_x_continuous(breaks = seq(2007,2017,by=2))
ggsave('figure_1_panel_b.png',height=3,width=5,units = 'in')

## Figure 2 Panel A: Market Shares ##
share_by_type <- with.avery[!is.na(jumbo) &  loan_type == 1,j=list(m = weighted.mean(type != 'S',w=loan_amount)),by=c('year','jumbo')]
share_by_type[,type := 'Conforming']
share_by_type[jumbo == 1,type := 'Jumbo' ]
ggplot(share_by_type) + geom_line(aes(x=year,y=m,group=type,color=type)) + theme_bw() + scale_color_brewer(palette = 'Set1') + theme(legend.position = 'none') + ylab(NULL) + xlab(NULL) + scale_y_continuous(labels = function(x) {paste0(x*100,'%')}) + scale_x_continuous(breaks = seq(2007,2017,by=2))
ggsave('figure_2_panel_a.png',height=3,width=5,units = 'in')

## Figure 2 Panel B: Origination Shares
share_by_type <- with.avery[!is.na(jumbo) &   loan_type == 1 & type != 'O',j=list(m = weighted.mean(jumbo,w=loan_amount)),by=c('year','type')]
ggplot(share_by_type) + geom_line(aes(x=year,y=m,group=type,linetype=type)) + theme_bw()  + theme(legend.position = 'none') + ylab(NULL) + xlab(NULL) + scale_y_continuous(labels = function(x) {paste0(x*100,'%')}) + scale_x_continuous(breaks = seq(2007,2017,by=2))
ggsave('figure_2_panel_b.png',height=3,width=5,units = 'in')

## Figures 3, 7
with.avery[,conf.bucket := cut(conf.pct,breaks = c(0,seq(.5+.01,1.5+.01,by = 0.05)),labels = c(seq(.5,1.5,by = 0.05)))]
byBin <- with.avery[ year %in% 2007:2017 & conf.pct > .4 ,j=list(pct.retained = mean(held,na.rm=T),pct.bank = mean(type != 'S',na.rm=T),pct.high = weighted.mean(high.cr,w=bank,na.rm=T),income = mean(applicant_income,na.rm=T)),by='conf.bucket']

ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.retained),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure_3_panel_a.png',height=3,width=5,units = 'in')

ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.bank),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure_3_panel_b.png',height=3,width=5,units = 'in')

bybin.4c <- with.avery[conf.pct > .4 & bank==1, .(pct.positive = mean(positive.crgap,na.rm=T), avg.residual = mean(cr_gap, na.rm = T)),by='conf.bucket']
ggplot(bybin.4c[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.positive),shape = 1, size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(limits = c(0.35,0.5), labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure3c_positive_crgap_scale.png',height=3,width=5,units = 'in')


## Figure 6
with.avery[,conf.bucket := cut(conf.pct,breaks = c(seq(0,2,by = 0.025),200),labels = c(seq(0,2,by = 0.025)))]
byBin <- with.avery[year %in% 2007:2017  ,j=list(n = .N),by='conf.bucket']
byBin[,pct := n / nrow(with.avery)]
ggplot(byBin[!is.na(conf.bucket) & as.numeric(as.character.factor(conf.bucket)) < 2 ]) + geom_line(aes(x = as.numeric(as.character.factor(conf.bucket)),y=pct)) + xlab(NULL) + ylab(NULL) + 
  theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_x_continuous(labels = percent) + scale_y_continuous(labels = percent) + geom_hline(yintercept = 0,linetype = 'dashed')
ggsave('figure_6_panel_a.png',height=3,width=5,units = 'in') 


with.avery[,conf.bucket := cut(conf.pct,breaks = c(0,seq(.5+.01,1.5+.01,by = 0.025)),labels = c(seq(.5,1.5,by = 0.025)))]
byBin <- with.avery[year %in% 2007:2017 & conf.pct > .4  ,j=list(income = median(applicant_income,na.rm=T),n = .N),by='conf.bucket']
ggplot(byBin[!is.na(conf.bucket) & abs(as.numeric(as.character.factor(conf.bucket))-1) < .16]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=income,size = n),shape = 1) + theme_bw() + geom_smooth(aes(x=as.numeric(as.character.factor(conf.bucket)),y=income),method = 'lm',linetype = 'dashed',color='black') +
  geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(labels = comma) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL) + theme(legend.position = 'none')
ggsave('figure_6_panel_b.png',height=3,width=5,units = 'in') 





